- 
                Notifications
    You must be signed in to change notification settings 
- Fork 802
[WIP] Pagination #806
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[WIP] Pagination #806
Conversation
| When is the pagination branch going to be merged to master and upgrade elasticsearch-dsl from 7.1.0 to say 7.1.1?? I am currently using elasticsearch-dsl==7.1.0 which does not have the pagination feature and I need it badly. Please merge! | 
| I also would love to see this landed. This is a feature we'd like to use. | 
| @honzakral is it WIP since 2018 or is it ready waiting for something? there's something left to do on it? maybe I can help if it is the case. I really love to see this feature around. | 
| Any updates on this? | 
| If anyone else, like me, is waiting with bated breath for a pagination feature, here's what I believe is a working solution in the meantime (I haven't thoroughly tested it, but I haven't found any problems yet). This is a slight expansion on the code in 806 and adds some functionality to the get_page function, allowing it to select the best option among starting from the beginning of the search, starting from the end, or starting from the user-provided values from the current page. If the target page is more than 10,000 results away from the closest reference page, this function will page through the intermediate results 10,000 at a time until it gets within 10,000, then go through the remaining intermediate results, and finally grab the target page. It's not a perfect solution and it might be problematic with paging through the middle of very large result sets, but short of that I think it will do pretty well. Code heredef _reverse_sort_entry(self, sort_entry):
    # "field"
    if isinstance(sort_entry, string_types):
        if sort_entry == '_score':
            return {'_score': 'asc'}
        return {sort_entry: 'desc'}
    f, sort_entry = sort_entry.copy().popitem()
    # {"field": "asc/desc"}
    if isinstance(sort_entry, string_types):
        return {f: 'asc' if sort_entry == 'desc' else 'desc'}
    # {"field": {"order": "asc/desc"}}
    sort_entry = sort_entry.copy()
    sort_entry['order'] = 'asc' if sort_entry['order'] == 'desc' else 'desc'
    return {f: sort_entry}
def get_page_count(self, size=None):
    size = size if size is not None else self._extra.get("size", 10)
    if size == 0:
        return 0
    pages, docs_left = divmod(self.count(), size)
    if docs_left:
        pages += 1
    return pages
def get_sort_vals(self, hit, sorters):
    """Helper function to extract the sorter values from a hit
    :param hit: An elasticsearch hit
    :type hit: Hit
    :param sorters: A list of the properties used to sort the search
    :type sorters: List
    :return: A list of the hit's relevant values
    :rtype: List
    """
    hit = hit.to_dict()
    hit_vals = []
    for val in sorters:
        if isinstance(val, dict):
            for k in val.keys():
                if k.endswith('.keyword'):
                    val = k[:-8]
                else:
                    val = k
        elif val.endswith('.keyword'):
            val = val[:-8]
        hit_vals.append(hit.get(val))
    return hit_vals
def get_page(self, page_no, size=None, current_page=0, first_row=None, last_row=None):
    if page_no == 0:
        raise ValueError("Search pagination is 1-based.")
    size = size if size is not None else self._extra.get("size", 10)
    s = self._clone()
    if 'id' not in s._sort:
        s._sort.append('id')
    if page_no * size <= 10000:
        s._extra["from"] = size * (page_no - 1)
        s._extra["size"] = size
        resp = s.execute()
        return resp
    total_pages = s.get_page_count(size=size)
    if page_no < 0:
        neg_page_no = page_no
    else:
        neg_page_no = page_no - total_pages - 1
    if ((page_no > total_pages / 2) or page_no < 0)and abs(neg_page_no) * size <= 10000:
        last_page_size = s.count() % size
        if neg_page_no == -1:
            s._extra["from"] = 0
            s._extra["size"] = last_page_size
        else:
            s._extra["from"] = last_page_size + (size * (abs(neg_page_no) - 2))
            s._extra["size"] = size
        s._sort = [s._reverse_sort_entry(se) for se in s._sort]
        resp = s.execute()
        # reverse the hits in the page when pagination from back
        resp['hits']['hits'] = resp.to_dict()['hits']['hits'][::-1]
        return resp
    jump_from_end = total_pages - page_no
    jump_from_current = abs(page_no - current_page)
    if page_no <= jump_from_end and page_no <= jump_from_current:
        if last_row:
            records_jump = ((page_no - current_page) * size) - size
            while records_jump > 10000:
                last_hit_vals = s.get_sort_vals(last_row, s._sort)
                last_row = s.get_next_page(last_hit=last_hit_vals, size=10000)[-1]
                records_jump -= 10000
            while records_jump > 0:
                last_hit_vals = s.get_sort_vals(last_row, s._sort)
                last_row = s.get_next_page(last_hit=last_hit_vals, size=records_jump)[-1]
                records_jump -= records_jump
            last_hit_vals = s.get_sort_vals(last_row, s._sort)
            return s.get_next_page(last_hit=last_hit_vals, size=size)
        elif current_page > 0:
            s._extra["from"] = size * (current_page - 1)
            s._extra["size"] = size
            resp = s.execute()
            return resp
        else:
            s._extra["from"] = size * (page_no - 1)
            s._extra["size"] = size
            resp = s.execute()
            return resp
    elif jump_from_current <= jump_from_end:
        page_jump_count = page_no - current_page
        if page_jump_count > 0:
            if last_row:
                records_jump = (page_jump_count * size) - size
                while records_jump > 10000:
                    last_hit_vals = s.get_sort_vals(last_row, s._sort)
                    last_row = s.get_next_page(last_hit=last_hit_vals, size=10000)[-1]
                    records_jump -= 10000
                while records_jump > 0:
                    last_hit_vals = s.get_sort_vals(last_row, s._sort)
                    last_row = s.get_next_page(last_hit=last_hit_vals, size=records_jump)[-1]
                    records_jump -= records_jump
                last_hit_vals = s.get_sort_vals(last_row, s._sort)
                return s.get_next_page(last_hit=last_hit_vals, size=size)
            elif current_page > 0:
                s._extra["from"] = size * (current_page - 1)
                s._extra["size"] = size
                resp = s.execute()
                return resp
            else:
                s._extra["from"] = size * (page_no - 1)
                s._extra["size"] = size
                resp = s.execute()
                return resp
        elif page_jump_count < 0:
            if first_row:
                records_jump = abs((page_jump_count * size) + size)
                while records_jump > 10000:
                    first_hit_vals = s.get_sort_vals(first_row, s._sort)
                    first_row = s.get_previous_page(first_hit=first_hit_vals, size=10000)[0]
                    records_jump -= 10000
                while records_jump > 0:
                    first_hit_vals = s.get_sort_vals(first_row, s._sort)
                    first_row = s.get_previous_page(first_hit=first_hit_vals, size=records_jump)[0]
                    records_jump -= records_jump
                first_hit_vals = s.get_sort_vals(first_row, s._sort)
                return s.get_previous_page(first_hit=first_hit_vals, size=size)
            elif current_page > 0:
                s._extra["from"] = size * (current_page - 1)
                s._extra["size"] = size
                resp = s.execute()
                return resp
            else:
                s._extra["from"] = size * (page_no - 1)
                s._extra["size"] = size
                resp = s.execute()
                return resp
    else:
        last_page_size = s.count() % size
        first_row = s.get_page(-1, size)[0]
        records_jump = (abs(neg_page_no) * size) - (size * 2)
        while records_jump > 10000:
            first_hit_vals = s.get_sort_vals(first_row, s._sort)
            first_row = s.get_previous_page(first_hit=first_hit_vals, size=10000)[0]
            records_jump -= 10000
        while records_jump > 0:
            first_hit_vals = s.get_sort_vals(first_row, s._sort)
            first_row = s.get_previous_page(first_hit=first_hit_vals, size=records_jump)[0]
            records_jump -= records_jump
        first_hit_vals = s.get_sort_vals(first_row, s._sort)
        return s.get_previous_page(first_hit=first_hit_vals, size=size)
def get_next_page(self, last_hit, size=None):
    size = size if size is not None else self._extra.get("size", 10)
    s = self._clone()
    s._extra["from"] = 0
    s._extra["size"] = size
    s._extra["search_after"] = list(last_hit)
    return s.execute()
def get_previous_page(self, first_hit, size=None):
    size = size if size is not None else self._extra.get("size", 10)
    s = self._clone()
    s._extra["from"] = 0
    s._extra["size"] = size
    s._extra["search_after"] = list(first_hit)
    # reverse the sort order
    s._sort = [self._reverse_sort_entry(se) for se in self._sort]
    resp = s.execute()
    # reverse the hits in the page
    resp['hits']['hits'] = resp.to_dict()['hits']['hits'][::-1]
    return resp
 | 
implementing #802